{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Requests\n",
    "\n",
    "它是一个Python第三方库，处理URL资源特别方便\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.6/site-packages/requests/__init__.py:91: RequestsDependencyWarning: urllib3 (1.26.3) or chardet (3.0.4) doesn't match a supported version!\n",
      "  RequestsDependencyWarning)\n"
     ]
    }
   ],
   "source": [
    "import requests      # 如果这里出错，证明你还没有安装这个库"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'requests' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-1-ba735181e3a7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'https://www.toutiao.com/'\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# 今日头条\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"查看返回状态\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus_code\u001b[0m\u001b[0;34m)\u001b[0m   \u001b[0;31m# 200代表成功 ，404， 403， 501这些意思可以百度查一下\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'requests' is not defined"
     ]
    }
   ],
   "source": [
    "r = requests.get('https://www.toutiao.com/')  # 今日头条\n",
    "print(\"查看返回状态\", r.status_code)   # 200代表成功 ，404， 403， 501这些意思可以百度查一下"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'r' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-1-d8ce6609767b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m)\u001b[0m           \u001b[0;31m# 返回网页的编码: \"utf-8\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \"\"\"\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'r' is not defined"
     ]
    }
   ],
   "source": [
    "# 查看一下内容\n",
    "\"\"\"\n",
    "print(r.text)               # 返回正常的网页内容, 即解压解码之后的内容\n",
    "print(r.content)            # 返回byte类型的网页内容, 即值解压, 没有解码\n",
    "print(r.json())             # 如果网页内容为json, 直接返回一个json对象\n",
    "print(r.encoding)           # 返回网页的编码: \"utf-8\"\n",
    "\"\"\"/\n",
    "r.text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 网页表头信息\n",
    "r.headers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from urllib.parse import urlencode\n",
    "# 获取一些有意思的内容\n",
    "def get_page(offset):\n",
    "    params = {\n",
    "        'offset': offset,\n",
    "        'format': 'json',\n",
    "        'keyword': '搞笑',\n",
    "        'autoload': 'true',\n",
    "        'count': '20',\n",
    "        'cur_tab': '3',\n",
    "        'from': 'gallery',\n",
    "    }\n",
    "    url = 'https://www.toutiao.com/search_content/?' + urlencode(params)\n",
    "    try:\n",
    "        print(url)\n",
    "        response = requests.get(url)\n",
    "        if response.status_code == 200:\n",
    "            return response.json()\n",
    "    except requests.ConnectionError:\n",
    "        return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "contents = get_page(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(contents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 分析一下结构\n",
    "data = contents.get('data')\n",
    "all_images = {}\n",
    "if data:\n",
    "    for item in data:\n",
    "        # print(item)\n",
    "        image_list = item.get('image_list')\n",
    "        title = item.get('title')\n",
    "        item_id = item.get('id')\n",
    "        # print(image_list)\n",
    "        imgs = []\n",
    "        for image in image_list:\n",
    "            imgs.append(image.get('url')[2:])\n",
    "        \n",
    "        all_images[item_id] = {\n",
    "            'title': title,\n",
    "            'images': imgs\n",
    "        }\n",
    "print(all_images)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 练习：保存图片   提示： os.path， 字符串处理（+http， 替换list->large， 文档操作)\n",
    "# 建议使用Pycharm来写"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 补充知识\n",
    "\n",
    "# 不同方式获取网页内容, 返回一个Response对象, 请求的参数可以为url或Request对象\n",
    "r0 = requests.get(\"https://github.com/timeline.json\")\n",
    "r1 = requests.post(\"http://httpbin.org/post\")\n",
    "r2 = requests.put(\"http://httpbin.org/put\")\n",
    "r3 = requests.delete(\"http://httpbin.org/delete\")\n",
    "r4 = requests.head(\"http://httpbin.org/get\")\n",
    "r5 = requests.options(\"http://httpbin.org/get\")\n",
    "r6 = requests.patch(\"http://httpbin.org/get\")\n",
    "\n",
    "# 定制请求头: 一个字典\n",
    "headers = {\"user-agent\": \"my-app/0.0.1\"}\n",
    "r = requests.get(\"https://api.github.com/some/endpoint\", headers=headers)\n",
    "print(r.request.headers)    # 获取request的头部\n",
    "print(r.headers)            # 获取response的头部\n",
    "\n",
    "# 模拟一个手机的ＵＡ\n",
    "# Mozilla/5.0 (Linux; Android 8.1.0; ALP-AL00 Build/HUAWEIALP-AL00; wv) \n",
    "# AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/63.0.3239.83 \n",
    "# Mobile Safari/537.36 T7/10.13 baiduboxapp/10.13.0.11 (Baidu; P1 8.1.0)\n",
    "\n",
    "# {\n",
    "#     \"content-encoding\": \"gzip\",\n",
    "#     \"transfer-encoding\": \"chunked\",\n",
    "#     \"connection\": \"close\",\n",
    "#     \"server\": \"nginx/1.0.4\",\n",
    "#     \"x-runtime\": \"148ms\",\n",
    "#     \"etag\": \"e1ca502697e5c9317743dc078f67693f\",\n",
    "#     \"content-type\": \"application/json\"\n",
    "# }\n",
    "print(r.headers[\"Content-Type\"])        # \"application/json\"\n",
    "print(r.headers.get(\"content-type\"))    # \"application/json\"\n",
    "\n",
    "# 更加复杂的POST请求: 表单\n",
    "post_dict = {\"key1\": \"value1\", \"key2\": \"value2\"}\n",
    "r = requests.post(\"http://httpbin.org/post\", data=post_dict)\n",
    "print(r.text)\n",
    "\n",
    "# 要想发送你的cookies到服务器, 可以使用cookies参数(一个字典)\n",
    "cookies = {\"cookies_are\": \"working\"}\n",
    "r = requests.get(\"http://httpbin.org/cookies\", cookies=cookies)\n",
    "print(r.text)\n",
    "\n",
    "# 会话对象: 会话对象让你能够跨请求保持某些参数, 它也会在同一个Session实例发出的所有请求之间保持cookie\n",
    "s = requests.Session()\n",
    "s.get(\"http://httpbin.org/cookies/set/sessioncookie/123456789\")\n",
    "s.get(\"http://httpbin.org/cookies\")\n",
    "for cookie in s.cookies:\n",
    "    print(cookie)\n",
    "\n",
    "# 如果你要手动为会话添加cookie, 就是用Cookie utility函数来操纵Session.cookies\n",
    "requests.utils.add_dict_to_cookiejar(s.cookies, {\"cookie_key\": \"cookie_value\"})\n",
    "\n",
    "# 会话也可用来为请求方法提供缺省数据, 这是通过为会话对象的属性提供数据来实现的\n",
    "s.auth = (\"user\", \"pass\")\n",
    "s.headers.update({\"x-test\": \"true\"})\n",
    "s.get(\"http://httpbin.org/headers\", headers={\"x-test2\": \"true\"})\n",
    "# both \"x-test\" and \"x-test2\" are sent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "s = requests.Session()\n",
    "s.get(\"http://httpbin.org/cookies/set/sessioncookie/123456789\")\n",
    "s.get(\"http://httpbin.org/cookies\")\n",
    "for cookie in s.cookies:\n",
    "    print(cookie)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "requests.utils.add_dict_to_cookiejar(s.cookies, {\"cookie_key\": \"cookie_value\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
